Load in packages which are needed
Use library(ggplot2) to load the package.
library(ggplot2)
The data-set being used is ‘diamonds’ data-set Can find out more info
using ?diamonds function
What does ggplot(diamonds) do?
ggplot(diamonds)
Answer: creates a grey block, this is because we haven’t said what we want on the x axis or y axis, or any other information
What does ggplot(diamonds, aes(x = carat, y = price))
do?
ggplot(diamonds, aes(x = carat, y = price))
Answer: this command defines the x and y axis. Note that the range is not random (it is based on the points in the data-set). However, we haven’t said how we want our datat to be plotted - we will add this now.
carat on the x-axis and
price on the y-axis.ggplot(diamonds, aes(x = carat, y = price)) +
geom_point()
cut.ggplot(diamonds, aes(x = carat, y = price, colour=cut))+
geom_point()
This has created a plot in which points are coloured by cut
ggplot(diamonds, aes(x = carat, y = price, colour=cut))+
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
geom_smooth has added a trendline
obds_diamonds.obds_diamonds <- ggplot(diamonds, aes(x = carat, y = price, colour=cut))+
geom_point() +
geom_smooth()
print(obds_diamonds)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Predict the difference between these two plots
ggplot(diamonds, aes(x = carat, y = price, colour = cut)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(diamonds, aes(x = carat, y = price)) +
geom_point(aes(colour = cut)) +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Answer: Plot 2 has only one trendline, whereas plot 1 has several trendlines for each different cut. This is because colour=cut was included in the ggplot() command for plot 1, but not for plot 2
Using the ChickWeight data set:
ggplot(ChickWeight, aes(x=Time, y=weight)) +
geom_point()
ggplot(ChickWeight, aes(x=Time, y=weight, colour=Diet))+
geom_point()
ggplot(ChickWeight, aes(x=Time, y=weight, colour=Diet))+
geom_point()+
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
At this point you should be able to visually identify the diet that leads to the highest mean increase in weight.
Answer: diet 3 leads to the highest increase as this trendline is above the rest
ggplot(ChickWeight, aes(x=Time, y=weight, colour=Diet))+
geom_point()+
geom_smooth(method="lm")+
facet_wrap(~Diet, nrow=2)
## `geom_smooth()` using formula 'y ~ x'
obds_chickweight.obds_chickweight <- ggplot(ChickWeight, aes(x=Time, y=weight, colour=Diet))+
geom_point()+
geom_smooth(method="lm")+
facet_wrap(~Diet, nrow=2)
ggplot2::msleep data set.ggplot2::msleep
## # A tibble: 83 × 11
## name genus vore order conservation sleep_total sleep_rem sleep_cycle awake
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Cheet… Acin… carni Carn… lc 12.1 NA NA 11.9
## 2 Owl m… Aotus omni Prim… <NA> 17 1.8 NA 7
## 3 Mount… Aplo… herbi Rode… nt 14.4 2.4 NA 9.6
## 4 Great… Blar… omni Sori… lc 14.9 2.3 0.133 9.1
## 5 Cow Bos herbi Arti… domesticated 4 0.7 0.667 20
## 6 Three… Brad… herbi Pilo… <NA> 14.4 2.2 0.767 9.6
## 7 North… Call… carni Carn… vu 8.7 1.4 0.383 15.3
## 8 Vespe… Calo… <NA> Rode… <NA> 7 NA NA 17
## 9 Dog Canis carni Carn… domesticated 10.1 2.9 0.333 13.9
## 10 Roe d… Capr… herbi Arti… lc 3 NA NA 21
## # … with 73 more rows, and 2 more variables: brainwt <dbl>, bodywt <dbl>
Type the name of the data-set to see a bit more about it
ggplot(ggplot2::msleep, aes(x=order)) +
geom_bar()
Note: must use geom_bar, not geom_histogram, as the data is not continuous
ggplot(ggplot2::msleep, aes(x=order)) +
geom_bar() +
theme(axis.text.x = element_text(angle=45, size=9, hjust = 1))
Use theme to change aspects of x axis text axis.text.x must be specified
to ensure that only the x axis text is affected element_text() is used
to define angle, size, and hjust (justification, 1=right justified)
ggplot(ggplot2::msleep, aes(x=order)) +
geom_bar() +
theme(axis.text.x = element_text(angle=45, size=9, hjust = 1)) +
labs(x="Taxonomic Order", y = "Number of Animals", title = "msleep dataset analysis")
Using theme can change the font size etc. for the axis titles, but labs must be used to change the words of the titles
From this point onwards, you may need to iteratively resize the text of the ticks and axes for readability.
ggplot(ggplot2::msleep, aes(x=order, fill = genus)) +
geom_bar() +
theme(axis.text.x = element_text(angle=45, size=9, hjust = 1)) +
labs(x="Taxonomic Order", y = "Number of Animals", title = "msleep dataset analysis")
Adding fill=genus will colour the bars by genus, but makes the legend very large - need to fix this
unit(2, "mm")). (Copy-paste and extend the code chunk
above.)ggplot(ggplot2::msleep, aes(x=order, fill = genus)) +
geom_bar() +
theme(axis.text.x = element_text(angle=45, size=9, hjust = 1), legend.key.size = unit(2, "mm")) +
labs(x="Taxonomic Order", y = "Number of Animals", title = "msleep dataset analysis")
Using the command legend.key.size we can alter the size of each key in the legend - by setting unit(2,“mm”) we have reduced the size
guide_legend(...)). (Copy-paste and extend the code chunk
above.)ggplot(ggplot2::msleep, aes(x=order, fill = genus)) +
geom_bar() +
theme(axis.text.x = element_text(angle=45, size=9, hjust = 1), legend.key.size = unit(2, "mm")) +
labs(x="Taxonomic Order", y = "Number of Animals", title = "msleep dataset analysis") +
guides(fill=guide_legend(ncol=3))
Using the guides function, we set the number of columns in the legend to 3
ggplot(ggplot2::msleep, aes(x=order, fill = genus)) +
geom_bar(colour="black", size=0.1) +
theme(axis.text.x = element_text(angle=45, size=9, hjust = 1), legend.key.size = unit(2, "mm")) +
labs(x="Taxonomic Order", y = "Number of Animals", title = "msleep dataset analysis") +
guides(fill=guide_legend(ncol=3))
Adding colour=“black” into the geom_bar function allows us to put a bar around each genus for clarity. Adding size=0.1 allows us to reduce the size of the line so that it is not too thick (default =1)
obds_msleep.obds_sleep <- ggplot(ggplot2::msleep, aes(x=order, fill = genus)) +
geom_bar(colour="black", size=0.1) +
theme(axis.text.x = element_text(angle=45, size=9, hjust = 1), legend.key.size = unit(2, "mm")) +
labs(x="Taxonomic Order", y = "Number of Animals", title = "msleep dataset analysis") +
guides(fill=guide_legend(ncol=3))
Collate the plots that we assigend to objects through the day, as a single plot.
Plots: obds_diamonds, obds_chickweight,
obds_msleep.
Methods: cowplot::plot_grid(),
patchwork, gridExtra::grid.arrange().
cowplot.Save each plot as its own row Then use cowplot to combine each row into one plot
library(cowplot)
first_row <- cowplot::plot_grid(obds_diamonds, labels="diamonds", ncol=1, nrow=1)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
second_row <- cowplot::plot_grid(obds_chickweight, labels="chickweight", ncol=1, nrow=1)
## `geom_smooth()` using formula 'y ~ x'
third_row <- cowplot::plot_grid(obds_sleep, labels="sleep", ncol=1, nrow=1)
superplot<-plot_grid(first_row, second_row, third_row, ncol=1, nrow=3)
superplot
patchwork.Use / to place one plot on top of another plot Use + to place one plot next to another plot
library(patchwork)
patchwork_plot <- obds_chickweight/obds_diamonds/obds_sleep
patchwork_plot
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
gridExtra.Similar to using cowplot
library(gridExtra)
gridExtra_plot <- gridExtra::grid.arrange(obds_chickweight, obds_diamonds, obds_sleep, ncol=1, nrow=3)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
gridExtra_plot
## TableGrob (3 x 1) "arrange": 3 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
We can also try a different way of using the gridExtra software to get a different arrangement of graphs in the file
library(gridExtra)
layout <- rbind(c(1,2), c(3,3))
gridExtra_plot_2 <- gridExtra::grid.arrange(obds_diamonds,obds_chickweight, obds_sleep, layout_matrix=layout)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using formula 'y ~ x'
You will likely need a few attempts to fine-tune the width and height of the output file.
We can use ggsave to save the various plots that we have created We give the plot name, the file extension, and define the width/height
ggsave("super_plot.pdf", plot=superplot, width=10, height=15)
ggsave("patchwork_plot.pdf", plot=patchwork_plot, width=10, height=15)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggsave("gridExtra_plot.pdf", plot=gridExtra_plot, width=10, height=15)
ggplot2::mpg and generate the most
informative plot that you can!geom_abline is used to draw a line on the plots this is unrelated to the data, but just draws a slope - can be used for comparison) we have also added linetype (dashed) and size specifications in the function to define these aspects
panel.grid.minor=element_blank is being used to remove the minor grid lines - this can help with clarity
facet_wrap function is needed to draw an individual plot for each manufacturer
labs has been used to add more informative x and y labels to the plot
strip.text=element_text in the theme function allows us to define the size of the text above each plot (i.e. the car manufacturer name)
ggplot(ggplot2::mpg, aes(x=cty, y=hwy, colour=year)) +
geom_point(size=0.5) +
geom_abline(intercept=0, slope=1, size=0.1, linetype="dashed")+
facet_wrap(~ manufacturer) +
theme(panel.grid.minor=element_blank(), strip.text=element_text(size=10)) +
labs(y="Highway petrol useage", x="City petrol useage")